{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 导入必要的工具包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pandas\n",
    "import numpy as numpy\n",
    "# plotting\n",
    "import seaborn as seaborn\n",
    "import matplotlib.pyplot as plot\n",
    "import warnings\n",
    "from sklearn import metrics\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "FE_train= pandas.read_csv(\"FE_day.csv\")\n",
    "# pandas display data frames as tables\n",
    "X_Feature=FE_train.drop('cnt', axis = 1).drop('yr', axis = 1).drop('instant', axis = 1)\n",
    "Y_Feature=FE_train['cnt'].values\n",
    "columns=X_Feature.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 分割数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(584, 10)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "#random select 20% as test data\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X_Feature, Y_Feature, random_state=33, test_size=0.2)\n",
    "X_train.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    "ss_X = StandardScaler()\n",
    "ss_y = StandardScaler()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 训练数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = ss_X.fit_transform(X_train)\n",
    "X_test = ss_X.transform(X_test)\n",
    "\n",
    "Y_train = ss_y.fit_transform(Y_train.reshape(-1, 1))\n",
    "Y_test = ss_y.transform(Y_test.reshape(-1, 1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 最小二乘线性回归模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>coef</th>\n",
       "      <th>columns</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>[0.2857841670063437]</td>\n",
       "      <td>atemp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[0.2625526698481601]</td>\n",
       "      <td>season</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[0.22970089552962258]</td>\n",
       "      <td>temp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>[0.04359549655316057]</td>\n",
       "      <td>workingday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[0.036516948661484014]</td>\n",
       "      <td>weekday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[-0.03673164385183661]</td>\n",
       "      <td>mnth</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>[-0.04055393245568578]</td>\n",
       "      <td>holiday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>[-0.12462537172918702]</td>\n",
       "      <td>windspeed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>[-0.1448473653126937]</td>\n",
       "      <td>hum</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[-0.16150778680341177]</td>\n",
       "      <td>weathersit</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     coef     columns\n",
       "5    [0.2857841670063437]       atemp\n",
       "0    [0.2625526698481601]      season\n",
       "4   [0.22970089552962258]        temp\n",
       "9   [0.04359549655316057]  workingday\n",
       "3  [0.036516948661484014]     weekday\n",
       "1  [-0.03673164385183661]        mnth\n",
       "8  [-0.04055393245568578]     holiday\n",
       "7  [-0.12462537172918702]   windspeed\n",
       "6   [-0.1448473653126937]         hum\n",
       "2  [-0.16150778680341177]  weathersit"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "# Default\n",
    "lr = LinearRegression()\n",
    "\n",
    "# TrainData\n",
    "lr.fit(X_train, Y_train)\n",
    "# Predict\n",
    "y_test_pred_lr = lr.predict(X_test)\n",
    "y_train_pred_lr = lr.predict(X_train)\n",
    "# 看看各特征的权重系数，系数的绝对值大小可视为该特征的重要性\n",
    "fs = pandas.DataFrame({\"columns\":list(columns), \"coef\":list((lr.coef_.T))})\n",
    "fs.sort_values(by=['coef'],ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The trained model is: y =[[ 0.26255267 -0.03673164 -0.16150779  0.03651695  0.2297009   0.28578417\n",
      "  -0.14484737 -0.12462537 -0.04055393  0.0435955 ]]x + [-7.21196599e-17]\n",
      "LinearRegression RMSE: 0.6699933713392981\n"
     ]
    }
   ],
   "source": [
    "print \"The trained model is: y ={}x + {}\".format(lr.coef_,\n",
    "lr.intercept_)\n",
    "\n",
    "print \"LinearRegression RMSE:\",numpy.sqrt(metrics.mean_squared_error(Y_test, y_test_pred_lr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAekAAAFsCAYAAADos0H8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAGcVJREFUeJzt3X+0V3Wd7/HnWzhJ41EIBUXIwFLzJ2BHg2sqRaYzmDj3Vtb1By5RW2azrKZf5m08OXanll67eXWacfqhJiXq1NDN5Rq7jAzpNR2OMaahwZTcIEKkFPFHcuB9//hu6ADncL6cH3w/8H0+1mKd797f/d37/d17cV7n89mfvXdkJpIkqTx7NboASZLUPUNakqRCGdKSJBXKkJYkqVCGtCRJhTKkJUkqlCGtphcRT0bEtEbX0UgR8ecR8euIWB8Rk3fhdtdHxKE9vHdhRDw4QNt5JiLePRDrknYlQ1p7tO5+OW/7yz8zj87MBb2sZ3xEZEQMHaRSG+164KOZ2ZqZP932zeq7v1SF6sqIuCEihvR3o9X2ftnf9Uh7KkNaKkAB4f8m4MlelpmYma3AqcA5wEWDXpXU5AxpNb2ure2IODEiFkXEuohYHRE3VIstrH4+X7Ump0bEXhHx3yJieUQ8GxG3R8TwLuu9oHpvbUR8fpvttEfEPRFxR0SsAy6stv1wRDwfEasi4qaIeF2X9WVEfCQilkbEixHx1xHx5uoz6yLirq7Lb/Mdu601IvaOiPXAEODfI+I/ettfmbkMeAiY1GX9wyPiG1XdKyPi2s0t7Yh4S0T8a0S8EBHPRcTcbb7TW6rX+0fED6rv8ijw5i7LbdeTERELIuLi6vWbI+Jfqn39XETMiYgRPeyLno6xVBxDWtraV4GvZuZ+1ELirmr+KdXPEVUX7cPAhdW/dwKHAq3ATQARcRTwt8C5wBhgODB2m23NBO4BRgBzgI3Ax4EDgKnAdOAj23zmDOBtwBTg08At1TbeCBwDfKiH79VtrZn5h6p1DLWW8pu7//gfRcRbgZOBZV1m3wZ0Am8BJgPvAS6u3vtr4H7gDcA44H/1sOqbgVep7a+L2LmWegB/AxwMHEltf7T3sGxPx1gqjiGtZvBPVev0+Yh4nlp49mQD8JaIOCAz12fmT3aw7LnADZn5y8xcD1wJfLBq7b0P+N+Z+WBmvgb8FbDtjfIfzsx/ysxNmflKZnZk5k8yszMznwH+nlrXcldfzsx1mfkk8ARwf7X9F4D7qAXkztZar8ci4iVgCbCAaj9GxIHAnwIfy8yXMvNZ4CvAB6vPbaDWnX5wZr6amdsNBqta3f8F+KtqHU9QC/66ZOayzPxR9UfHGuAGtt93m+3MMZYaypBWMzg7M0ds/sf2rdOuZgOHA09FxL9FxJk7WPZgYHmX6eXAUODA6r1fb34jM18G1m7z+V93nYiIwyPihxHx26oL/L9Ta1V3tbrL61e6mW6lezuqtV7HV+s/B3g7sE81/01AC7Cqyx9Cfw+Mrt7/NLWW7qNRG0nfXQt5VFVP132yvJvluhURoyPizqqrfR1wB9vvu8125hhLDWVIS11k5tLM/BC1gPkycE9E7MP2rWCA31ALqM0OodbluxpYRa1rF4CIeD2w/7ab22b6a8BTwGFVV+znqIXbQNhRrXXLmruAh6n1DkAtWP8AHNDlj6H9MvPo6jO/zcxLMvNg4MPA324+D93FmqqeN25T42YvVT//pMu8g7q8/htq+/O4at+dRw/7bgfHWCqOIS11ERHnRcSozNwEPF/N3kgtRDZRO5+72XeBj0fEhIhopdbynZuZndTONb83Iv5TNZjrC/QeuPsC64D11Xnfywbsi+241r74EnBpRByUmauonXP+HxGxXzVI7c0RcSpARLw/Ijb/wfJ7amG6sevKMnMj8D2gPSL+pDqnP6vL+2uAlcB5ETGkao13PX++L7Ce2sC+scCneip8B8dYKo4hLW3tDODJasTzV4EPVudRXwa+CDxUdelOAb4JfJvayO9fURv09BcA1TnjvwDupNaqfhF4llqLsyefBP5rtew/AHN3sOzO6rHWvsjMnwH/yh/D8ALgdcDPqQXxPdQGgAGcADxS7dMfAFdk5q+6We1HqXWn/xa4FfjWNu9fUm1vLXA08H+7vPcFat3xLwD3Ugv8nnR7jHf8jaXGiMzuevEkDaSq9fo8ta7s7gJKkrZjS1oaJBHx3qrrdh9qd/T6GfBMY6uStDsxpKXBM5PagK3fAIdR61a160pS3ezuliSpULakJUkq1C69qf8BBxyQ48eP35WblCSpKB0dHc9l5qh6lt2lIT1+/HgWLVq0KzcpSVJRIqLuu+nZ3S1JUqEMaUmSCmVIS5JUqF16TlqS1L0NGzawYsUKXn3VO5TuKYYNG8a4ceNoaWnp8zoMaUkqwIoVK9h3330ZP348EQP18DM1Smaydu1aVqxYwYQJE/q8Hru7JakAr776Kvvvv78BvYeICPbff/9+94wY0pJUCAN6zzIQx9OQliSpUJ6TlqQCtbfv+vUNGTKEY489ls7OTiZMmMC3v/1tRowYsdPbuvjii/nEJz7BUUcdtdX8W2+9lUWLFnHTTTft9DoBWltbWb9+fV3LTps2jeuvv562trYt8xYtWsTtt9/OjTfe2KftN4ItaUkSAK9//etZvHgxTzzxBCNHjuTmm2/u03q+/vWvbxfQJWhraxv0gN64ceOArs+QliRtZ+rUqaxcuXLL9HXXXccJJ5zAcccdx9VXXw3ASy+9xIwZM5g4cSLHHHMMc+fOBWqt2M23gP7Wt77F4YcfzqmnnspDDz20ZX0XXngh99xzz5bp1tZWANavX8/06dM5/vjjOfbYY5k3b952ta1atYpTTjmFSZMmccwxx/DjH/+4ru+0YMECzjzzTADa29u56KKLmDZtGoceeuhW4X3HHXdw4oknMmnSJD784Q9vCd7LLruMtrY2jj766C37AGq3vL7mmmt4xzvewd13311XLfWyu1uStJWNGzcyf/58Zs+eDcD999/P0qVLefTRR8lMzjrrLBYuXMiaNWs4+OCDuffeewF44YUXtlrPqlWruPrqq+no6GD48OG8853vZPLkyTvc9rBhw/j+97/Pfvvtx3PPPceUKVM466yzthqE9Z3vfIfTTz+dq666io0bN/Lyyy/36Xs+9dRTPPDAA7z44oscccQRXHbZZSxbtoy5c+fy0EMP0dLSwkc+8hHmzJnDBRdcwBe/+EVGjhzJxo0bmT59Oo8//jjHHXfclroffPDBPtWxI4a0JAmAV155hUmTJvHMM8/wtre9jdNOOw2ohfT999+/JWDXr1/P0qVLOfnkk/nkJz/JZz7zGc4880xOPvnkrdb3yCOPMG3aNEaNqj3w6ZxzzuEXv/jFDmvITD73uc+xcOFC9tprL1auXMnq1as56KCDtixzwgkncNFFF7FhwwbOPvtsJk2a1KfvO2PGDPbee2/23ntvRo8ezerVq5k/fz4dHR2ccMIJW/bJ6NGjAbjrrru45ZZb6OzsZNWqVfz85z/fEtLnnHNOn2rojd3dkiTgj+ekly9fzmuvvbblnHRmcuWVV7J48WIWL17MsmXLmD17NocffjgdHR0ce+yxXHnllVxzzTXbrbOny5CGDh3Kpk2btqz/tddeA2DOnDmsWbOGjo4OFi9ezIEHHrjdtcannHIKCxcuZOzYsZx//vncfvvtffq+e++995bXQ4YMobOzk8xk1qxZW77r008/TXt7O7/61a+4/vrrmT9/Po8//jgzZszYqq599tmnTzX0xpa01IudGWU70CNypUYYPnw4N954IzNnzuSyyy7j9NNP5/Of/zznnnsura2trFy5kpaWFjo7Oxk5ciTnnXcera2t3HrrrVut5+1vfztXXHEFa9euZb/99uPuu+9m4sSJQO08bkdHBx/4wAeYN28eGzZsAGpd5qNHj6alpYUHHniA5cu3f6rj8uXLGTt2LJdccgkvvfQSjz32GBdccMGAfPfp06czc+ZMPv7xjzN69Gh+97vf8eKLL7Ju3Tr22Wcfhg8fzurVq7nvvvuYNm3agGxzRwxpSSpQo//gmzx5MhMnTuTOO+/k/PPPZ8mSJUydOhWoDfK64447WLZsGZ/61KfYa6+9aGlp4Wtf+9pW6xgzZgzt7e1MnTqVMWPGcPzxx28ZhHXJJZcwc+ZMTjzxRKZPn76lJXruuefy3ve+l7a2NiZNmsRb3/rW7WpbsGAB1113HS0tLbS2tvbYkp4xY8aW+2ZPnTqVyy+/vNfvfdRRR3Httdfynve8h02bNtHS0sLNN9/MlClTmDx5MkcffTSHHnooJ510Uv07sx8iM3fJhgDa2tpy84g/aXdhS1q7wpIlSzjyyCMbXYYGWHfHNSI6MrOth49sxXPSkiQVypCWJKlQhrQkFWJXnn7U4BuI42lIS1IBhg0bxtq1aw3qPcTm50kPGzasX+txdLckFWDcuHGsWLGCNWvWNLoUDZBhw4Yxbty4fq3DkJakArS0tDBhwoRGl6HC2N0tSVKhDGlJkgplSEuSVChDWpKkQhnSkiQVypCWJKlQhrQkSYUypCVJKpQhLUlSoQxpSZIKZUhLklQoQ1qSpEIZ0pIkFcqQliSpUIa0JEmFMqQlSSqUIS1JUqEMaUmSCmVIS5JUKENakqRCGdKSJBWq7pCOiCER8dOI+GE1PSEiHomIpRExNyJeN3hlSpLUfHamJX0FsKTL9JeBr2TmYcDvgdkDWZgkSc2urpCOiHHADODr1XQA7wLuqRa5DTh7MAqUJKlZ1duS/p/Ap4FN1fT+wPOZ2VlNrwDGdvfBiLg0IhZFxKI1a9b0q1hJkppJryEdEWcCz2ZmR9fZ3Sya3X0+M2/JzLbMbBs1alQfy5QkqfkMrWOZk4CzIuLPgGHAftRa1iMiYmjVmh4H/GbwypQkqfn02pLOzCszc1xmjgc+CPxLZp4LPAC8r1psFjBv0KqUJKkJ9ec66c8An4iIZdTOUX9jYEqSJElQX3f3Fpm5AFhQvf4lcOLAlyRJksA7jkmSVCxDWpKkQhnSkiQVypCWJKlQhrQkSYUypCVJKpQhLUlSoQxpSZIKZUhLklQoQ1qSpEIZ0pIkFcqQliSpUIa0JEmFMqQlSSrUTj2qUtqTtLc3ugJJ2jFb0pIkFcqQliSpUIa0JEmFMqQlSSqUIS1JUqEMaUmSCmVIS5JUKENakqRCGdKSJBXKkJYkqVCGtCRJhTKkJUkqlCEtSVKhfAqW1AD1PoHLJ3VJzc2WtCRJhTKkJUkqlCEtSVKhDGlJkgplSEuSVChHd0sDyNHYkgaSLWlJkgplSEuSVChDWpKkQhnSkiQVypCWJKlQhrQkSYXyEixJuy0fVKI9nS1pSZIKZUhLklQoQ1qSpEIZ0pIkFcqQliSpUIa0JEmFMqQlSSqUIS1JUqEMaUmSCmVIS5JUKENakqRCGdKSJBXKkJYkqVA+BUt7HJ94JGlPYUtakqRCGdKSJBWq15COiGER8WhE/HtEPBkRX6jmT4iIRyJiaUTMjYjXDX65kiQ1j3pa0n8A3pWZE4FJwBkRMQX4MvCVzDwM+D0we/DKlCSp+fQa0lmzvppsqf4l8C7gnmr+bcDZg1KhJElNqq7R3RExBOgA3gLcDPwH8HxmdlaLrADG9vDZS4FLAQ455JD+1itpN7Uzo+4doS/V1DVwLDM3ZuYkYBxwInBkd4v18NlbMrMtM9tGjRrV90olSWoyOzW6OzOfBxYAU4AREbG5JT4O+M3AliZJUnOrZ3T3qIgYUb1+PfBuYAnwAPC+arFZwLzBKlKSpGZUzznpMcBt1XnpvYC7MvOHEfFz4M6IuBb4KfCNQaxTkqSm02tIZ+bjwORu5v+S2vlpSZI0CLzjmCRJhfIBG1LB6r0UyUuWpD2TLWlJkgplSEuSVChDWpKkQhnSkiQVypCWJKlQhrQkSYUypCVJKpQhLUlSoQxpSZIKZUhLklQoQ1qSpEIZ0pIkFcoHbEjql93h4R4+qES7K1vSkiQVypCWJKlQhrQkSYUypCVJKpQhLUlSoQxpSZIKZUhLklQoQ1qSpEIZ0pIkFcqQliSpUIa0JEmFMqQlSSqUIS1JUqEMaUmSCmVIS5JUKENakqRCGdKSJBXKkJYkqVCGtCRJhTKkJUkqlCEtSVKhDGlJkgplSEuSVChDWpKkQhnSkiQVypCWJKlQQxtdgKQytbc357alktiSliSpUIa0JEmFMqQlSSqUIS1JUqEMaUmSCmVIS5JUKC/BkqTKzlz65WVi2hVsSUuSVChDWpKkQhnSkiQVypCWJKlQhrQkSYUypCVJKpSXYEl7AC8HKle9x8ZjqO7YkpYkqVCGtCRJheo1pCPijRHxQEQsiYgnI+KKav7IiPhRRCytfr5h8MuVJKl51NOS7gT+MjOPBKYAl0fEUcBngfmZeRgwv5qWJEkDpNeQzsxVmflY9fpFYAkwFpgJ3FYtdhtw9mAVKUlSM9qpc9IRMR6YDDwCHJiZq6AW5MDoHj5zaUQsiohFa9as6V+1kiQ1kbpDOiJagX8EPpaZ6+r9XGbekpltmdk2atSovtQoSVJTqiukI6KFWkDPyczvVbNXR8SY6v0xwLODU6IkSc2pntHdAXwDWJKZN3R56wfArOr1LGDewJcnSVLzqueOYycB5wM/i4jF1bzPAV8C7oqI2cD/A94/OCVKktSceg3pzHwQiB7enj6w5UiSpM2845gkSYXyARvabfgAAknNxpa0JEmFMqQlSSqUIS1JUqEMaUmSCmVIS5JUKENakqRCGdKSJBXKkJYkqVCGtCRJhTKkJUkqlCEtSVKhDGlJkgplSEuSVCifgiVJfeBT2bQr2JKWJKlQhrQkSYUypCVJKpQhLUlSoQxpSZIKZUhLklQoQ1qSpEIZ0pIkFcqQliSpUIa0JEmFMqQlSSqUIS1JUqEMaUmSCmVIS5JUKENakqRCGdKSJBXKkJYkqVCGtCRJhTKkJUkq1NBGF6Dm1t7e6AokqVy2pCVJKpQhLUlSoQxpSZIKZUhLklQoQ1qSpEI5uluDwlHbktR/tqQlSSqUIS1JUqEMaUmSCmVIS5JUKENakqRCGdKSJBXKkJYkqVCGtCRJhTKkJUkqlCEtSVKhDGlJkgplSEuSVChDWpKkQhnSkiQVypCWJKlQhrQkSYXqNaQj4psR8WxEPNFl3siI+FFELK1+vmFwy5QkqfnU05K+FThjm3mfBeZn5mHA/GpakiQNoF5DOjMXAr/bZvZM4Lbq9W3A2QNclyRJTa+v56QPzMxVANXP0QNXkiRJAhg62BuIiEuBSwEOOeSQwd6cJO3x2tsHdjmVq68t6dURMQag+vlsTwtm5i2Z2ZaZbaNGjerj5iRJaj59DekfALOq17OAeQNTjiRJ2qyeS7C+CzwMHBERKyJiNvAl4LSIWAqcVk1LkqQB1Os56cz8UA9vTR/gWiRJUhfecUySpEIN+uhuSVLvHImt7tiSliSpUIa0JEmFMqQlSSqUIS1JUqEMaUmSCmVIS5JUKC/Bkpd+SFKhbElLklQoQ1qSpEIZ0pIkFcqQliSpUIa0JEmFMqQlSSqUIS1JUqEMaUmSCmVIS5JUKENakqRCGdKSJBXKkJYkqVCGtCRJhTKkJUkqlCEtSVKhDGlJkgplSEuSVChDWpKkQhnSkiQVamijC9DgaW9vdAWSdgf1/q7wd8quZ0takqRCGdKSJBXKkJYkqVCGtCRJhTKkJUkqlCEtSVKhvARLklSXnbkEy8u1BoYtaUmSCmVIS5JUKENakqRCGdKSJBXKkJYkqVCGtCRJhfISrIL4JBpJUle2pCVJKpQhLUlSoQxpSZIKZUhLklQoQ1qSpELt1qO7d4ebvTsSW5LUV7akJUkqlCEtSVKhDGlJkgplSEuSVChDWpKkQhnSkiQVare+BKuRGnlplZd1SarH7vB7qlHLDdY6B5otaUmSCmVIS5JUKENakqRC9SukI+KMiHg6IpZFxGcHqihJktSPkI6IIcDNwJ8CRwEfioijBqowSZKaXX9a0icCyzLzl5n5GnAnMHNgypIkSZGZfftgxPuAMzLz4mr6fODtmfnRbZa7FLi0mjwCeLrv5fbbAcBzDdx+s3P/N57HoLHc/41XwjF4U2aOqmfB/lwnHd3M2y7xM/MW4JZ+bGfARMSizGxrdB3Nyv3feB6DxnL/N97udgz60929Anhjl+lxwG/6V44kSdqsPyH9b8BhETEhIl4HfBD4wcCUJUmS+tzdnZmdEfFR4J+BIcA3M/PJAatscBTR7d7E3P+N5zFoLPd/4+1Wx6DPA8ckSdLg8o5jkiQVypCWJKlQTRfSEXFdRDwVEY9HxPcjYkSja2omEfH+iHgyIjZFxG5zGcTuzlv4NlZEfDMino2IJxpdSzOKiDdGxAMRsaT6/XNFo2uqV9OFNPAj4JjMPA74BXBlg+tpNk8A/xlY2OhCmoW38C3CrcAZjS6iiXUCf5mZRwJTgMt3l/8DTRfSmXl/ZnZWkz+hdn23dpHMXJKZjbzrXDPyFr4NlpkLgd81uo5mlZmrMvOx6vWLwBJgbGOrqk/ThfQ2LgLua3QR0iAbC/y6y/QKdpNfUNJAi4jxwGTgkcZWUp/+3Ba0WBHxf4CDunnrqsycVy1zFbUukDm7srZmUM/+1y5V1y18pT1dRLQC/wh8LDPXNbqeeuyRIZ2Z797R+xExCzgTmJ5eKD7getv/2uW8ha+aXkS0UAvoOZn5vUbXU6+m6+6OiDOAzwBnZebLja5H2gW8ha+aWkQE8A1gSWbe0Oh6dkbThTRwE7Av8KOIWBwRf9fogppJRPx5RKwApgL3RsQ/N7qmPV01UHLzLXyXAHftBrfw3aNExHeBh4EjImJFRMxudE1N5iTgfOBd1e/9xRHxZ40uqh7eFlSSpEI1Y0takqTdgiEtSVKhDGlJkgplSEuSVChDWpKkQhnSkiQVypCWJKlQ/x/SEpTL1p0KSQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 504x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "f, ax = plot.subplots(figsize=(7, 5)) \n",
    "f.tight_layout() \n",
    "ax.hist(Y_train - y_train_pred_lr,bins=40, label='Residuals Linear', color='b', alpha=.5); \n",
    "ax.set_title(\"Histogram of Residuals\") \n",
    "ax.legend(loc='best');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## L2正则 --> 岭回归岭回归\n",
    "### 利用5折交叉验证求出超参alpha（详见fivefold.py)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 训练岭回归模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0001\n"
     ]
    }
   ],
   "source": [
    "import fivefold as ff\n",
    "ridge=ff.BuildRidgeModel(X_Feature,Y_Feature)\n",
    "ridge_alpha=ff.ridge_alpha\n",
    "print(ridge_alpha)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ridge RMSE: 0.6699933850637039\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import Ridge\n",
    "ridge=Ridge(ridge_alpha)\n",
    "ridge.fit(X_train,Y_train)\n",
    "y_train_pred_ridge=ridge.predict(X_test)\n",
    "print \"Ridge RMSE:\",numpy.sqrt(metrics.mean_squared_error(Y_test, y_train_pred_ridge))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0001\n",
      "Lasso RMSE: 0.6713040712382308\n"
     ]
    }
   ],
   "source": [
    "lasso=ff.BuildLassoModel(X_Feature,Y_Feature)\n",
    "lasso_alpha=ff.ridge_alpha\n",
    "print(lasso_alpha)\n",
    "\n",
    "from sklearn.linear_model import Lasso\n",
    "Lasso=Lasso(lasso_alpha)\n",
    "lasso.fit(X_train,Y_train)\n",
    "y_train_pred_lasso=lasso.predict(X_test)\n",
    "\n",
    "print \"Lasso RMSE:\",numpy.sqrt(metrics.mean_squared_error(Y_test, y_train_pred_lasso))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   coef_lasso                 coef_lr               coef_ridge     columns\n",
      "0    0.247118    [0.2625526698481601]    [0.26255250454188506]      season\n",
      "1   -0.021210  [-0.03673164385183661]   [-0.03673149420155081]        mnth\n",
      "2   -0.160875  [-0.16150778680341177]    [-0.1615077882907542]  weathersit\n",
      "3    0.033967  [0.036516948661484014]    [0.03651693545242447]     weekday\n",
      "4    0.228693   [0.22970089552962258]    [0.22970138496556108]        temp\n",
      "5    0.286330    [0.2857841670063437]     [0.2857836503749048]       atemp\n",
      "6   -0.142326   [-0.1448473653126937]   [-0.14484732705810965]         hum\n",
      "7   -0.122094  [-0.12462537172918702]   [-0.12462536570988915]   windspeed\n",
      "8   -0.039596  [-0.04055393245568578]  [-0.040553937183322245]     holiday\n",
      "9    0.041959   [0.04359549655316057]    [0.04359549288891059]  workingday\n"
     ]
    }
   ],
   "source": [
    "fs = pandas.DataFrame({\"columns\":list(columns), \"coef_lr\":list((lr.coef_.T)), \"coef_ridge\":list((ridge.coef_.T)), \"coef_lasso\":list((lasso.coef_.T))})\n",
    "fs.sort_values(by=['coef_lr'],ascending=False)\n",
    "print(fs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 根据参数和RMSE,选择岭回归模型最为合适。"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Raw Cell Format",
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
